{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 前馈神经网络"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "from time import time\n",
    "\n",
    "import numpy as np\n",
    "import random\n",
    "\n",
    "class Network(object):\n",
    "    def __init__(self, sizes):\n",
    "        \"\"\"\n",
    "            sizes: list类型, 代表各层包含神经元的个数\n",
    "            \n",
    "            偏差bias，权重weight使用高斯分布初始化\n",
    "        \"\"\"\n",
    "        self.num_layers = len(sizes)\n",
    "        self.sizes = sizes\n",
    "        self.biases = [np.random.randn(y,1) for y in sizes[1:]]\n",
    "        self.weights = [np.random.randn(y, x) for x,y in zip(sizes[:-1], sizes[1:])]\n",
    "        \n",
    "    def feedforward(self, a):\n",
    "        \"\"\"对于给定的输入a，返回神经网络的输出\n",
    "        \n",
    "            a: 输入向量\n",
    "        \"\"\"\n",
    "        for b, w in zip(self.biases, self.weights):\n",
    "            a = sigmoid(np.dot(w, a) + b)\n",
    "        return a\n",
    "    \n",
    "    def SGD(self, training_data, epochs, mini_batch_size, eta, test_data=None):\n",
    "        \"\"\"使用小批量随机梯度下降算法训练神经网络\n",
    "        \n",
    "            输入：\n",
    "                training_data: a list of tuples``(x,y) \n",
    "                                    representing the training inputs and the desired outputs.\n",
    "                                    \n",
    "                epochs: 迭代次数\n",
    "                \n",
    "                mini_batch_size: 小批量的大小\n",
    "                \n",
    "                eta: 学习速率\n",
    "                \n",
    "                test_data: 测试数据集，形式跟training_data一样\"\"\"\n",
    "        if test_data: n_test = len(test_data)\n",
    "        n_train = len(training_data)\n",
    "        for j in range(epochs):\n",
    "            random.shuffle(training_data)\n",
    "            mini_batches = [training_data[k:k+mini_batch_size] for k in range(0, n_train, mini_batch_size)]\n",
    "            \n",
    "            for mini_batch in mini_batches:\n",
    "                self.updata_mini_batch(mini_batch, eta)\n",
    "                \n",
    "            if test_data:\n",
    "                print(\"Epoch {0}: {1} / {2}\".format(j, self.evaluate(test_data), n_test))\n",
    "            else:\n",
    "                print(\"Epoch {0} complete.\".format(j))\n",
    "                \n",
    "    def updata_mini_batch(self, mini_batch, eta):\n",
    "        \"\"\"随机梯度下降，使用BP算法更新神经网络的权重和偏置\n",
    "            通过计算当前 mini_batch 中的训练样本对 Network 的权重和偏置进行了更新\n",
    "        \n",
    "            输入: \n",
    "                mini_batch: a list of tuples ``(x,y)``\n",
    "                \n",
    "                eta: 学习速率learning rate\"\"\"\n",
    "        nabla_b = [np.zeros(b.shape) for b in self.biases]\n",
    "        nabla_w = [np.zeros(w.shape) for w in self.weights]\n",
    "        \n",
    "        for x,y in mini_batch:\n",
    "            delta_nabla_b, delta_nabla_w = self.backprop(x,y)\n",
    "            nabla_b = [nb+dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]\n",
    "            nabla_w = [nw+dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]\n",
    "            \n",
    "        self.weights = [w - (eta/len(mini_batch))*nw for w, nw in zip(self.weights, nabla_w)]\n",
    "        self.baises = [b - (eta/len(mini_batch))*nb for b, nb in zip(self.biases, nabla_b)]\n",
    "        \n",
    "    def backprop(self, x, y):\n",
    "        \"\"\"Return a tuple ``(nabla_b, nabla_w)`` representing the gradient for the cost function C_x.\"\"\"\n",
    "        nabla_b = [np.zeros(b.shape) for b in self.biases]\n",
    "        nabla_w = [np.zeros(w.shape) for w in self.weights]\n",
    "        # feedforward\n",
    "        activation = x\n",
    "        activations = [x] # list to store all the activations, layer by layer\n",
    "        zs = [] # list to store all the z vectors, layer by layer\n",
    "        for b, w in zip(self.biases, self.weights):\n",
    "            z = np.dot(w, activation) + b\n",
    "            zs.append(z)\n",
    "            activation = sigmoid(z)\n",
    "            activations.append(activation)\n",
    "            \n",
    "        # backward pass\n",
    "        delta = self.cost_derivative(activations[-1], y) * sigmoid_prime(zs[-1])\n",
    "        nabla_b[-1] = delta\n",
    "        nabla_w[-1] = np.dot(delta, activations[-2].transpose())\n",
    "        \n",
    "        for l in range(2, self.num_layers):\n",
    "            z = zs[-l]\n",
    "            sp = sigmoid_prime(z)\n",
    "            delta = np.dot(self.weights[-l+1].transpose(), delta) * sp\n",
    "            nabla_b[-l] = delta\n",
    "            nabla_w[-l] = np.dot(delta, activations[-l-1].transpose())\n",
    "        return (nabla_b, nabla_w)\n",
    "    \n",
    "    def evaluate(self, test_data):\n",
    "        \"\"\"返回test_data中，正确预测的数量\"\"\"\n",
    "        test_results = [(np.argmax(self.feedforward(x)), y) for (x,y) in test_data]\n",
    "        return sum(int(x == y) for (x,y) in test_results)\n",
    "    \n",
    "    def cost_derivative(self, output_activations, y):\n",
    "        \"\"\"返回损失函数的偏导数结果\"\"\"\n",
    "        return (output_activations - y)\n",
    "    \n",
    "        \n",
    "def sigmoid(z):\n",
    "    \"\"\"The sigmoid function.\"\"\"\n",
    "    return 1.0 / (1.0 + np.exp(-z))\n",
    "\n",
    "def sigmoid_prime(z):\n",
    "    \"\"\"Derivative of the sigmoid function.\"\"\"\n",
    "    return sigmoid(z)*(1-sigmoid(z))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "这个程序对识别手写数字效果如何?好吧，让我们先加载MNIST数据。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# 加载 MNIST 数据\n",
    "import mnist_loader\n",
    "training_data, validation_data, test_data = mnist_loader.load_data_wrapper()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "在加载完 MNIST 数据之后,我们将先设置一个有 30 个隐藏层神经元的 Network 。使用随机梯度下降来从 MNIST training_data 学习 30 次迭代期,小批量数据大小为 10,学习速率 η = 3.0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 0: 9018 / 10000\n",
      "Epoch 1: 9212 / 10000\n",
      "Epoch 2: 9280 / 10000\n",
      "Epoch 3: 9294 / 10000\n",
      "Epoch 4: 9370 / 10000\n",
      "Epoch 5: 9365 / 10000\n",
      "Epoch 6: 9367 / 10000\n",
      "Epoch 7: 9411 / 10000\n",
      "Epoch 8: 9427 / 10000\n",
      "Epoch 9: 9429 / 10000\n",
      "Epoch 10: 9418 / 10000\n",
      "Epoch 11: 9433 / 10000\n",
      "Epoch 12: 9430 / 10000\n",
      "Epoch 13: 9458 / 10000\n",
      "Epoch 14: 9455 / 10000\n",
      "Epoch 15: 9492 / 10000\n",
      "Epoch 16: 9496 / 10000\n",
      "Epoch 17: 9467 / 10000\n",
      "Epoch 18: 9495 / 10000\n",
      "Epoch 19: 9470 / 10000\n",
      "Epoch 20: 9497 / 10000\n",
      "Epoch 21: 9515 / 10000\n",
      "Epoch 22: 9504 / 10000\n",
      "Epoch 23: 9495 / 10000\n",
      "Epoch 24: 9475 / 10000\n",
      "Epoch 25: 9490 / 10000\n",
      "Epoch 26: 9497 / 10000\n",
      "Epoch 27: 9511 / 10000\n",
      "Epoch 28: 9505 / 10000\n",
      "Epoch 29: 9513 / 10000\n",
      "Training time: 236.505\n"
     ]
    }
   ],
   "source": [
    "# 设置一个有 30 个隐藏层神经元的 Network\n",
    "# 使用随机梯度下降来从 MNIST training_data 学习超过 30 次迭代期,小批量数据大小为 10,学习速率 η = 3.0\n",
    "t0 = time()\n",
    "net = Network([784, 30, 10])\n",
    "net.SGD(training_data, 30, 10, 3.0, test_data=test_data)\n",
    "print(\"Training time: %.3f\" % round(time()-t0, 3))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "从输出可以看到，经过训练的网络给出的识别率约为 95% —— 在峰值时为 95.27%(最后一轮)!"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "现在来看看是否增加隐藏神经元数量，会帮助我们得到更好的结果。我们重新运行上面的实验,将隐藏神经元数量改到 100。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 0: 6550 / 10000\n",
      "Epoch 1: 7454 / 10000\n",
      "Epoch 2: 7572 / 10000\n",
      "Epoch 3: 7607 / 10000\n",
      "Epoch 4: 7614 / 10000\n",
      "Epoch 5: 8720 / 10000\n",
      "Epoch 6: 8755 / 10000\n",
      "Epoch 7: 8778 / 10000\n",
      "Epoch 8: 8768 / 10000\n",
      "Epoch 9: 8779 / 10000\n",
      "Epoch 10: 8781 / 10000\n",
      "Epoch 11: 8795 / 10000\n",
      "Epoch 12: 8802 / 10000\n",
      "Epoch 13: 8806 / 10000\n",
      "Epoch 14: 8806 / 10000\n",
      "Epoch 15: 8824 / 10000\n",
      "Epoch 16: 8814 / 10000\n",
      "Epoch 17: 8808 / 10000\n",
      "Epoch 18: 8821 / 10000\n",
      "Epoch 19: 8821 / 10000\n",
      "Epoch 20: 8830 / 10000\n",
      "Epoch 21: 8816 / 10000\n",
      "Epoch 22: 8838 / 10000\n",
      "Epoch 23: 8822 / 10000\n",
      "Epoch 24: 8833 / 10000\n",
      "Epoch 25: 8845 / 10000\n",
      "Epoch 26: 8839 / 10000\n",
      "Epoch 27: 8837 / 10000\n",
      "Epoch 28: 8839 / 10000\n",
      "Epoch 29: 8841 / 10000\n",
      "Training time: 1275.017\n"
     ]
    }
   ],
   "source": [
    "t0 = time()\n",
    "\n",
    "net = Network([784, 100, 10])\n",
    "net.SGD(training_data, 30, 10, 3.0, test_data=test_data)\n",
    "\n",
    "print(\"Training time: %.3f\" % round(time()-t0, 3))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
